library("dplyr")
library("tidyr")
library("ggplot2")
library("GenomicRanges")
library("patchwork")
sample_names <- c("DLBCL", "mixA")
refs <- c("control", "mixB")
comparisons <- c(paste0(sample_names, "_vs_", refs))
chess_files <- list.files(file.path(basedir, "data", "chess", comparisons), full.names = TRUE, pattern = "25kb.txt")
names(chess_files) <- sapply(chess_files, function(path){
parts <- tail(strsplit(path, "/")[[1]], 2)
gsub(".txt|genome_scan", "", paste(parts, collapse = ""))
})
chess_comparisons_DLBCL <- lapply(chess_files, function(f){
read.table(f, sep = "\t", header = TRUE)
}) %>% bind_rows(.id = "comparison") %>%
extract(comparison, regex = "(.*)_vs_(.*)_window([[:alnum:]]+Mb)_step([[:alnum:]]+[k|M]b)_([[:alnum:]]+kb)",
into = c("query", "ref", "window_size", "step_size", "resolution"),
remove = FALSE) %>%
mutate(ref = factor(ref, levels = unique(refs)),
query = factor(query, levels = unique(sample_names))) %>%
dplyr::filter(!is.na(ssim))
# add genomic position data
pairs_files <- list.files(file.path(basedir, "data", "chess"), pattern = "hg38_pairs_.*.bedpe", full.names = TRUE)
names(pairs_files) <- gsub(".bedpe|hg38_pairs_", "", basename(pairs_files))
comparison_regions <- lapply(pairs_files, function(f){
read.table(f, sep = "\t",
col.names = c("chr", "start", "end", "chr2", "start2", "end2",
"ID", "score", "strand1", "strand2")) %>%
dplyr::select(chr, start, end, ID)
}) %>% bind_rows(.id = "params") %>%
extract(params, regex = "window([[:alnum:]]+Mb)_step([[:alnum:]]+[k|M]b)",
into = c("window_size", "step_size"))
chess_comparisons_DLBCL <- left_join(chess_comparisons_DLBCL, comparison_regions)
chr_order <- paste0("chr", c(1:22, "X", "Y"))
chr_offsets <-
chess_comparisons_DLBCL %>%
dplyr::select(chr, end) %>%
group_by(chr) %>%
summarise(length = max(end)) %>%
mutate(chr = factor(chr, levels = chr_order, ordered = TRUE)) %>%
arrange(chr) %>%
mutate(offset = lag(length, default = 0)) %>%
mutate(offset = cumsum(offset + 20e6)) %>%
mutate(midpoint = length/2) %>%
mutate(tick_pos = offset+midpoint)
to_plot <- chess_comparisons_DLBCL %>%
dplyr::filter(resolution=="25kb") %>%
mutate(chr = factor(chr, levels = chr_order, ordered = TRUE)) %>%
left_join(chr_offsets) %>%
mutate(pos = start + offset) %>%
mutate(category = case_when(
z_ssim <= -1.2 & SN > 0.6 ~ "SN > 0.6 & Z-SSIM < -1.2",
z_ssim <= -1.2 ~ "Z-SSIM < -1.2",
TRUE ~ "Z-SSIM > -1.2"
)) %>%
mutate(category = factor(category, levels = c("Z-SSIM > -1.2", "Z-SSIM < -1.2", "SN > 0.6 & Z-SSIM < -1.2"), ordered = TRUE)) %>%
arrange(category) %>%
tidyr::unite("comparison", query, ref, sep = "_vs_")
all_chrs <- to_plot %>%
ggplot(aes(x = pos, y = -1*z_ssim, colour = category)) +
geom_hline(yintercept = 1.2, colour = "grey", linetype="dotted", size = 1) +
geom_point(size = 0.8) +
scale_colour_manual(values = c("Z-SSIM > -1.2" = "lightgrey", "Z-SSIM < -1.2" = "pink", "SN > 0.6 & Z-SSIM < -1.2" = "darkred")) +
scale_x_continuous(breaks = chr_offsets$tick_pos, labels = chr_offsets$chr) +
scale_y_continuous(breaks = c(-6, -4, -2, 0, 2)) +
theme_classic(base_size = 16) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
labs(x = "", y = "-1 x Z-normalised\nSSIM") +
facet_grid(comparison ~.)
chr2 <- to_plot %>%
dplyr::filter(chr=="chr2") %>%
ggplot(aes(x = start, y = -1*z_ssim, colour = category)) +
geom_hline(yintercept = 1.2, colour = "grey", linetype="dotted", size = 1) +
geom_point(size = 0.8) +
scale_colour_manual(values = c("Z-SSIM > -1.2" = "lightgrey", "Z-SSIM < -1.2" = "pink", "SN > 0.6 & Z-SSIM < -1.2" = "darkred")) +
scale_x_continuous(labels = scales::unit_format(unit = "Mb", scale = 1e-6)) +
scale_y_continuous(breaks = c(-4, -2, 0, 2)) +
theme_classic(base_size = 16) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "chr2", y = "-1 x Z-normalised\nSSIM") +
facet_grid(comparison ~.)
all_chrs
chr2
chess_comparisons_DLBCL_real <- chess_comparisons_DLBCL %>%
dplyr::filter(query=="DLBCL", ref == "control") %>%
dplyr::select(-comparison, -query, -ref)
chess_comparisons_DLBCL_mixed <- chess_comparisons_DLBCL %>%
dplyr::filter(query=="mixA", ref == "mixB") %>%
dplyr::select(-comparison, -query, -ref)
join_cols <- setdiff(colnames(chess_comparisons_DLBCL_real), c("SN", "ssim", "z_ssim"))
chess_comparisons_DLBCL_combined <- chess_comparisons_DLBCL_real %>%
left_join(chess_comparisons_DLBCL_mixed,by = join_cols, suffix = c("_real", "_mixed"))
ssim_corAB <- broom::tidy(cor.test(chess_comparisons_DLBCL_combined$ssim_real,
chess_comparisons_DLBCL_combined$ssim_mixed, method = "pearson"))
p1 <- ggplot(chess_comparisons_DLBCL_combined, aes(x = ssim_real, y = ssim_mixed)) +
geom_point(size = 0.5) +
geom_abline(slope = 1, intercept = 0, linetype = 2, colour = "blue", size = 1) +
# geom_text(data = ssim_corAB, x = 0, y = 0.95, hjust = "left",
# aes(label = paste0("R = ", signif(estimate, 3)))) +
# geom_text(data = ssim_corAB, x = 0, y = 0.95, hjust = "left",
# aes(label = paste0("\n\np = ", format.pval(p.value, 2)))) +
theme_classic(base_size = 10) +
labs(x = "SSIM (DLBCL, control)", y = "SSIM (mixA, mixB)")
SN_corAB <- broom::tidy(cor.test(chess_comparisons_DLBCL_combined$SN_real,
chess_comparisons_DLBCL_combined$SN_mixed, method = "pearson"))
p2 <- ggplot(chess_comparisons_DLBCL_combined, aes(x = SN_real, y = SN_mixed)) +
geom_point(size = 0.5) +
geom_abline(slope = 1, intercept = 0, linetype = 2, colour = "blue", size = 1) +
# geom_text(data = SN_corAB, x = 0.1, y = 0.6, hjust = "left",
# aes(label = paste0("R = ", signif(estimate, 3)))) +
# geom_text(data = SN_corAB, x = 0.1, y = 0.6, hjust = "left",
# aes(label = paste0("\n\np = ", format.pval(p.value, 2)))) +
theme_classic(base_size = 10) +
labs(x = "SN (DLBCL, control)", y = "SN (mixA, mixB)")
Fig1_DLBCL_SSIM_SN_scatterplot <- p1 + p2
Fig1_DLBCL_SSIM_SN_scatterplot
ggsave(file.path(basedir, "figures/final/Fig1_DLBCL_SSIM_SN_scatterplot.pdf"))
bind_rows(SSIM = ssim_corAB, SN = SN_corAB, .id = "variable")
## # A tibble: 2 x 9
## variable estimate statistic p.value parameter conf.low conf.high method
## <chr> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <chr>
## 1 SSIM 0.979 356. 0 5430 0.978 0.980 Pearson's pr…
## 2 SN 0.767 88.0 0 5430 0.756 0.777 Pearson's pr…
## # … with 1 more variable: alternative <chr>
ssim_wilcoxAB <- broom::tidy(wilcox.test(chess_comparisons_DLBCL_combined$ssim_real,
chess_comparisons_DLBCL_combined$ssim_mixed, paired = TRUE))
SN_wilcoxAB <- broom::tidy(wilcox.test(chess_comparisons_DLBCL_combined$SN_real,
chess_comparisons_DLBCL_combined$SN_mixed, paired = TRUE))
bind_rows(SSIM = ssim_wilcoxAB, SN = SN_wilcoxAB, .id = "variable")
## # A tibble: 2 x 5
## variable statistic p.value method alternative
## <chr> <dbl> <dbl> <chr> <chr>
## 1 SSIM 23305 0 Wilcoxon signed rank test with continu… two.sided
## 2 SN 14755582 0 Wilcoxon signed rank test with continu… two.sided
ssim_diff <- chess_comparisons_DLBCL_combined %>%
mutate(ssim_diff = ssim_real - ssim_mixed)
Fig1_DLBCL_SSIM_diff_chr2 <- ssim_diff %>%
dplyr::filter(chr == "chr2") %>%
ggplot(aes(x = start, y = ssim_diff)) +
geom_line() +
scale_x_continuous(labels = scales::unit_format(unit = "Mb", scale = 1e-6)) +
theme_classic(base_size = 10) +
labs(x = "Position on chr2", y = "SSIM (patient, control)\n- SSIM (mixed)")
# ssim_diff %>%
# arrange(ssim_diff) %>%
# dplyr::filter(chr == "chr2") %>%
# head(n = 10) %>%
# select(chr, start, end) %>%
# unique() %>%
# write.table(file = file.path(basedir, "data/chess/DLBCL_vs_control/chr2_top10_differences_25kb.bed"),
# col.names = FALSE, row.names = FALSE, quote = FALSE, sep = "\t")
Fig1_DLBCL_SSIM_diff_chr2
ggsave(file.path(basedir, "figures/final/Fig1_DLBCL_SSIM_diff_chr2.pdf"))
chess_comparisons_DLBCL_combined_ranking <- chess_comparisons_DLBCL_combined %>%
mutate(rank_real = dplyr::min_rank(ssim_real),
rank_mixed = dplyr::min_rank(ssim_mixed)) %>%
mutate(rank_diff_mixed = rank_real - rank_mixed)
Fig1_DLBCL_SSIM_rankdiff_chr2 <- chess_comparisons_DLBCL_combined_ranking %>%
dplyr::filter(chr == "chr2") %>%
ggplot(aes(x = start, y = rank_diff_mixed)) +
geom_line() +
scale_x_continuous(labels = scales::unit_format(unit = "Mb", scale = 1e-6)) +
theme_classic(base_size = 10) +
labs(x = "Position on chr2", y = "SSIM rank difference")
Fig1_DLBCL_SSIM_rankdiff_chr2
ggsave(file.path(basedir, "figures/final/Fig1_DLBCL_SSIM_rankdiff_chr2.pdf"))
top10_ssim_diff <- ssim_diff %>%
arrange(ssim_diff) %>%
dplyr::filter(chr == "chr2") %>%
head(n = 10)
top10_ssim_rankdiff <- chess_comparisons_DLBCL_combined_ranking %>%
arrange(rank_diff_mixed) %>%
dplyr::filter(chr == "chr2") %>%
head(n = 10)
top10_ssim_diff %>%
dplyr::filter(ID %in% top10_ssim_rankdiff$ID) %>%
arrange(start) %>%
tidyr::unite("region", chr, start, end, sep = "-") %>%
pull(region)
## [1] "chr2-31500000-33500000" "chr2-86000000-88000000"
## [3] "chr2-86500000-88500000" "chr2-96000000-98000000"
## [5] "chr2-104000000-106000000" "chr2-145000000-147000000"
## [7] "chr2-146000000-148000000" "chr2-182500000-184500000"
Fig1_DLBCL_SSIM_SN_scatterplot + Fig1_DLBCL_SSIM_diff_chr2 +
plot_layout(widths = c(1.5, 1.5, 5))
ggsave(file.path(basedir, "figures/final/Fig1_DLBCL_SSIM_diff_assembled.pdf"))
Fig1_DLBCL_SSIM_SN_scatterplot + Fig1_DLBCL_SSIM_rankdiff_chr2 +
plot_layout(widths = c(1.5, 1.5, 5))
ggsave(file.path(basedir, "figures/final/Fig1_DLBCL_SSIM_rankdiff_assembled.pdf"))
Of the ten 2 Mb windows with the largest SSIM differences on chr2, 6 are genuine differences. 3 are adjacent to the centromere and contain regions that have mapping artefacts and low coverage and should likely have been filtered in Hi-C processing. One region contains a genuine patient-control difference that nevertheless overlaps a likely mapping artefact.
insulation_score_bigwigs <- list.files(file.path(basedir, "data", "boundaries"),
full.names = TRUE, pattern = "(DLBCL_25kb|control_25kb).*bw")
names(insulation_score_bigwigs) <- gsub(".bw", "", basename(insulation_score_bigwigs))
insulation_score_list <- lapply(insulation_score_bigwigs, rtracklayer::import.bw)
regions <- makeGRangesFromDataFrame(comparison_regions, keep.extra.columns = TRUE)
start(regions) <- start(regions) + 1
comparison_regions <- as_tibble(comparison_regions)
comparison_regions$regions <- as.list(split(regions, 1:length(regions)))
get_var <- function(regions, insulation, ...){
gr <- subsetByOverlaps(insulation, regions)
var(gr$score, na.rm = TRUE)
}
# just using two representative insulation tracks
regions_w_var_list <- lapply(insulation_score_list[c("control_25kb_8", "DLBCL_25kb_8")], function(x){
comparison_regions %>%
mutate(variance = purrr::pmap_dbl(comparison_regions, get_var, insulation = x)) %>%
return(.)
})
regions_w_var <- bind_rows(regions_w_var_list, .id = "insulation") %>%
dplyr::select(-regions) %>%
tidyr::pivot_wider(names_from = insulation, values_from = variance)
# regions_w_var %>%
# dplyr::select(starts_with("control"), starts_with("DLBCL")) %>%
# as.matrix() %>%
# cor(use = "pairwise.complete.obs")
chess_comparisons_DLBCL_combined_w_var <- left_join(chess_comparisons_DLBCL_combined,
regions_w_var)
samples <- c("DLBCL", "control")
marginals_files <- file.path(basedir, "data/hic/", samples, paste0(samples, "_25kb_marginals.bed"))
names(marginals_files) <- gsub("_marginals.bed", "", basename(marginals_files))
marginals_list <- lapply(marginals_files, function(f){
readr::read_delim(f, delim = "\t", col_names = c("chr", "start", "end", "name", "score", "strand")) %>%
makeGRangesFromDataFrame(keep.extra.columns = TRUE)
})
get_coverage <- function(regions, marginals, ...){
gr <- subsetByOverlaps(marginals, regions)
sum(gr$score, na.rm = TRUE)
}
regions_w_coverage_list <- lapply(marginals_list, function(x){
comparison_regions %>%
mutate(coverage = purrr::pmap_dbl(comparison_regions, get_coverage, marginals = x)) %>%
return(.)
})
regions_w_coverage <- bind_rows(regions_w_coverage_list, .id = "sample") %>%
dplyr::select(-regions) %>%
# mutate(coverage = log(coverage)) %>%
tidyr::pivot_wider(names_from = sample, values_from = coverage)
# regions_w_var %>%
# dplyr::select(starts_with("control"), starts_with("DLBCL")) %>%
# as.matrix() %>%
# cor(use = "pairwise.complete.obs")
chess_comparisons_DLBCL_combined_w_var_coverage <- left_join(chess_comparisons_DLBCL_combined_w_var,
regions_w_coverage)
chess_comparisons_DLBCL_combined_w_var_coverage <- chess_comparisons_DLBCL_combined_w_var_coverage %>%
dplyr::rename("insulation_variance_control" = "control_25kb_8",
"insulation_variance_DLBCL" = "DLBCL_25kb_8",
"coverage_control" = "control_25kb",
"coverage_DLBCL" = "DLBCL_25kb")
head(chess_comparisons_DLBCL_combined_w_var_coverage)
## window_size step_size resolution ID SN_real ssim_real z_ssim_real chr
## 1 2Mb 500kb 25kb 1 0.4104954 0.2826877 -0.70530704 chr1
## 2 2Mb 500kb 25kb 2 0.4947246 0.5421304 0.85147762 chr1
## 3 2Mb 500kb 25kb 3 0.5304248 0.4904586 0.54142104 chr1
## 4 2Mb 500kb 25kb 4 0.4247878 0.4689058 0.41209361 chr1
## 5 2Mb 500kb 25kb 5 0.4559340 0.4126787 0.07470307 chr1
## 6 2Mb 500kb 25kb 6 0.4673613 0.4436457 0.26052031 chr1
## start end SN_mixed ssim_mixed z_ssim_mixed insulation_variance_control
## 1 500000 2500000 0.2281341 0.4363934 -0.24419117 0.3982246
## 2 1000000 3000000 0.2583484 0.6382701 0.93089992 0.4445615
## 3 1500000 3500000 0.2514610 0.6144611 0.79231209 0.4570081
## 4 2000000 4000000 0.2485482 0.6140805 0.79009642 0.2825730
## 5 2500000 4500000 0.2788098 0.4838057 0.03178834 0.1896939
## 6 3000000 5000000 0.2822353 0.4836198 0.03070607 0.1133514
## insulation_variance_DLBCL coverage_DLBCL coverage_control
## 1 0.2660260 339713 215010
## 2 0.3051939 369928 237218
## 3 0.3539829 349074 229303
## 4 0.2570517 349493 233177
## 5 0.2070390 339284 232132
## 6 0.1085575 349060 241125
correlations <- chess_comparisons_DLBCL_combined_w_var_coverage %>%
dplyr::select(starts_with("ssim"), starts_with("SN"),
starts_with("insulation_variance"), starts_with("coverage")) %>%
corrr::correlate(method = "pearson") %>%
dplyr::select(term, starts_with("insulation_variance"), starts_with("coverage")) %>%
dplyr::filter(startsWith(term, "ssim") | startsWith(term, "SN")) %>%
corrr::stretch()
correlations %>%
tidyr::extract(x, into = c("variable", "var_sample"), regex = "(.*)_(control|DLBCL)") %>%
tidyr::extract(y, into = c("output", "comparison"), regex = "(.*)_(real|mixed)") %>%
dplyr::filter(comparison=="real") %>%
ggplot(aes(x = output, y = var_sample, fill = r, label = signif(r, 2))) +
geom_tile() +
geom_text() +
facet_wrap(~variable,nrow = 2, strip.position = "left") +
theme_classic(base_size = 10) +
scale_fill_distiller(direction = 1) +
theme(strip.placement = "outside") +
labs(y = "", x = "", fill = "Pearson's R")
ggsave(file.path(basedir, "figures/final/Fig1_DLBCL_correlations.pdf"))
N.B. as these numbers are extracted from .mcools, these are post-filtering of binned data.
wutz_metadata <- readr::read_tsv(file.path(basedir, "metadata_2021-09-27-15h-22m.tsv"), comment = "#")
sample_names <- paste(wutz_metadata$`Condition Name`, wutz_metadata$Rep, sep = "_")
stats_files <- file.path(basedir, "data/hic/", sample_names, paste0(sample_names, "_50kb_hic_stats.txt"))
stats <- lapply(stats_files, function(f){
read.csv(f, header = FALSE,
col.names = c("class", "n"))
}) %>% setNames(sample_names) %>%
bind_rows(.id = "file")
ggplot(stats, aes(x = file, y = n)) +
geom_col() +
coord_flip() +
scale_y_continuous("Sequencing depth", labels = scales::unit_format(unit = "million", scale = 1e-6)) +
theme_classic(base_size = 10)
stats %>%
arrange(n)
## file class n
## 1 CTCFdegron_G1_auxin0_1 total 147993483
## 2 SCC1degron_G1_auxin180_1 total 151452441
## 3 wt_G1_none_1 total 171927161
## 4 CTCFdegron_G1_auxin120_1 total 183429031
## 5 SCC1degron_G1_WAPLRNAi_1 total 277647794
## 6 SCC1degron_G1_PDS5RNAi_1 total 294562323
## 7 SCC1degron_G1_noRNAi_1 total 307920917
## 8 SCC1degron_G1_auxin0_1 total 359170180
## 9 SCC1degron_prometaphase_noRNAi_1 total 369224447
Because the sequencing depths vary quite a lot, I will continue with datasets downsampled to the same number of valid read pairs.
marginals_files <- file.path(basedir, "data/hic/", sample_names, paste0(sample_names, "_50kb_marginals.bed"))
names(marginals_files) <- gsub("_marginals.bed", "", basename(marginals_files))
wutz_marginals_list <- lapply(marginals_files, function(f){
readr::read_delim(f, delim = "\t", col_names = c("chr", "start", "end", "name", "score", "strand")) %>%
makeGRangesFromDataFrame(keep.extra.columns = TRUE)
})
wutz_marginals_df <- lapply(wutz_marginals_list, as.data.frame) %>%
bind_rows(.id = "file")
wutz_percent_df <- wutz_marginals_df %>%
group_by(file) %>%
summarise(percent = 100 * sum(score > 1000) / n())
# percent_df %>%
# knitr::kable(digits = 1)
ggplot(wutz_percent_df, aes(x = file, y = percent, label = signif(percent, 3))) +
geom_col() +
geom_text(colour = "black", hjust = "outward", nudge_y = 1) +
geom_hline(yintercept = 80, linetype = 2) +
scale_y_continuous("% bins with more than 1000 valid reads", limits = c(0, 100)) +
coord_flip() +
theme_classic(base_size = 10) +
labs(x = "")
At 50 kb resolution, all samples pass the Rao et al. threshold of having 80% of bins with at least 1000 reads.
combinations <- expand.grid(sample_names, sample_names) %>%
filter(Var1 != Var2)
comparisons <- c(paste0(combinations$Var1, "_vs_", combinations$Var2))
chess_files <- list.files(file.path(basedir, "data", "chess", comparisons, "downsampled"), full.names = TRUE, pattern = "chr.*txt")
names(chess_files) <- sapply(chess_files, function(path){
parts <- tail(strsplit(path, "/")[[1]], 3)
gsub("downsampled_|.txt", "", paste(parts, collapse = "_"))
})
chess_comparisons_wutz <- lapply(chess_files, function(f){
read.table(f, sep = "\t", header = TRUE)
}) %>% bind_rows(.id = "comparison")
chess_comparisons_wutz <- chess_comparisons_wutz %>%
extract(comparison, regex = "(.*)_vs_(.*)_(chr[[:alnum:]]+)_window([[:alnum:]]+Mb)_step([[:alnum:]]+[k|M]b)_([[:alnum:]]+kb)",
into = c("query", "ref", "chr", "window_size", "step_size", "resolution"),
remove = FALSE) %>%
dplyr::filter(!is.na(ssim))
chess_comparisons_wutz <- left_join(chess_comparisons_wutz, dplyr::select(comparison_regions, -regions))
plotting_subset_wutz <- chess_comparisons_wutz %>%
dplyr::filter(query=="wt_G1_none_1") %>%
dplyr::filter(ref %in% c("CTCFdegron_G1_auxin0_1", "CTCFdegron_G1_auxin120_1",
"SCC1degron_G1_auxin0_1", "SCC1degron_G1_auxin180_1",
"SCC1degron_prometaphase_noRNAi_1")) %>%
dplyr::mutate(ref = case_when(
ref == "CTCFdegron_G1_auxin0_1" ~ "CTCF degron (no auxin)",
ref == "CTCFdegron_G1_auxin120_1" ~ "CTCF degron (auxin 2h)",
ref == "SCC1degron_G1_auxin0_1" ~ "SCC1 degron (no auxin)",
ref == "SCC1degron_G1_auxin180_1" ~ "SCC1 degron (auxin 3h)",
ref == "SCC1degron_prometaphase_noRNAi_1" ~ "SCC1 degron (no auxin, prometaphase)"
)) %>%
dplyr::mutate(ref = factor(ref, levels = c("CTCF degron (no auxin)",
"CTCF degron (auxin 2h)",
"SCC1 degron (no auxin)",
"SCC1 degron (auxin 3h)",
"SCC1 degron (no auxin, prometaphase)"),
ordered = TRUE))
Fig1_Wutz_SSIM_violin <- ggplot(plotting_subset_wutz, aes(x = ref, y = ssim)) +
geom_violin() +
stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1),
geom = "pointrange", color = "black") +
theme_classic(base_size = 10) +
coord_flip() +
labs(x = "", y = "SSIM (WT, query)") +
scale_x_discrete(limits = rev)
Fig1_Wutz_SSIM_violin
ggsave(file.path(basedir, "figures/final/Fig1_Wutz_SSIM_violin.pdf"))
Fig1_Wutz_SN_violin <- ggplot(plotting_subset_wutz, aes(x = ref, y = SN)) +
geom_violin() +
stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1),
geom = "pointrange", color = "black") +
theme_classic(base_size = 10) +
coord_flip() +
labs(x = "", y = "SN (WT, query)") +
scale_x_discrete(limits = rev)
Fig1_Wutz_SN_violin
ggsave(file.path(basedir, "figures/final/Fig1_Wutz_SN_violin.pdf"))
Fig1_Wutz_SSIM_chr2 <- plotting_subset_wutz %>%
filter(chr=="chr2") %>%
mutate(group = paste0(ref, ifelse(start < 90e6, 1, 2))) %>%
ggplot(aes(x = start, y = ssim, colour = ref, group = group)) +
geom_line() +
theme_classic(base_size = 10) +
scale_x_continuous(labels = scales::unit_format(unit = "Mb", scale = 1e-6)) +
labs(y = "SSIM (WT, query)", x = "", colour = "") +
scale_colour_manual(values = c("darkblue", "cornflowerblue", "darkred", "lightcoral", "grey")) +
theme(legend.position="bottom") +
guides(colour=guide_legend(nrow=2,byrow=TRUE))
Fig1_Wutz_SSIM_chr2
ggsave(file.path(basedir, "figures/final/Fig1_Wutz_SSIM_chr2.pdf"))
Fig1_Wutz_SSIM_chr16 <- plotting_subset_wutz %>%
filter(chr=="chr16") %>%
mutate(group = paste0(ref, ifelse(start < 40e6, 1, 2))) %>%
ggplot(aes(x = start, y = ssim, colour = ref, group = group)) +
geom_line() +
theme_classic(base_size = 10) +
scale_x_continuous(labels = scales::unit_format(unit = "Mb", scale = 1e-6)) +
labs(y = "SSIM (WT, query)", x = "", colour = "") +
scale_colour_manual(values = c("darkblue", "cornflowerblue", "darkred", "lightcoral", "grey")) +
theme(legend.position="bottom") +
guides(colour=guide_legend(nrow=2,byrow=TRUE))
Fig1_Wutz_SSIM_chr16
ggsave(file.path(basedir, "figures/final/Fig1_Wutz_SSIM_chr16.pdf"))
get_coverage <- function(regions, marginals, ...){
gr <- subsetByOverlaps(marginals, regions)
sum(gr$score, na.rm = TRUE)
}
comparison_regions_4Mb <- comparison_regions %>%
dplyr::filter(window_size == "4Mb") %>%
as_tibble()
regions_4Mb <- makeGRangesFromDataFrame(comparison_regions_4Mb, keep.extra.columns = TRUE)
start(regions_4Mb) <- start(regions_4Mb) + 1
comparison_regions_4Mb$regions <- as.list(split(regions_4Mb, 1:length(regions_4Mb)))
regions_w_wutz_coverage_list <- lapply(wutz_marginals_list, function(x){
comparison_regions_4Mb %>%
mutate(coverage = purrr::pmap_dbl(comparison_regions_4Mb, get_coverage, marginals = x)) %>%
return(.)
})
regions_w_wutz_coverage <- bind_rows(regions_w_wutz_coverage_list, .id = "sample") %>%
dplyr::select(-regions) %>%
# mutate(coverage = log(coverage)) %>%
tidyr::pivot_wider(names_from = sample, values_from = coverage, names_prefix = "cov_")
regions_w_wutz_coverage <- regions_w_wutz_coverage %>%
dplyr::rename_with(.fn = function(x) { gsub("_50kb", "", x) })
wutz_correlations <- chess_comparisons_wutz %>%
dplyr::select(-comparison) %>%
dplyr::filter(query=="wt_G1_none_1") %>%
dplyr::filter(ref %in% c("CTCFdegron_G1_auxin0_1", "CTCFdegron_G1_auxin120_1",
"SCC1degron_G1_auxin0_1", "SCC1degron_G1_auxin180_1",
"SCC1degron_prometaphase_noRNAi_1")) %>%
dplyr::select(-z_ssim, -SN) %>%
pivot_wider(names_from = ref, values_from = ssim, names_prefix = "ssim_") %>%
left_join(regions_w_wutz_coverage) %>%
dplyr::select(-c(query, chr, start, end, window_size, step_size, resolution, ID)) %>%
corrr::correlate() %>%
corrr::stretch()
wutz_correlations_subset <- wutz_correlations %>%
dplyr::filter(startsWith(x, "ssim") & startsWith(y, "cov")) %>%
dplyr::mutate(ssim = gsub("ssim_", "", x),
cov = gsub("cov_", "", y)) %>%
dplyr::select(-x, -y) %>%
dplyr::filter(cov == "wt_G1_none_1" | ssim==cov) %>%
dplyr::mutate(cov = ifelse(cov == "wt_G1_none_1", "WT", "self")) %>%
dplyr::mutate(ssim = case_when(
ssim == "CTCFdegron_G1_auxin0_1" ~ "CTCF degron (no auxin)",
ssim == "CTCFdegron_G1_auxin120_1" ~ "CTCF degron (auxin 2h)",
ssim == "SCC1degron_G1_auxin0_1" ~ "SCC1 degron (no auxin)",
ssim == "SCC1degron_G1_auxin180_1" ~ "SCC1 degron (auxin 3h)",
ssim == "SCC1degron_prometaphase_noRNAi_1" ~ "SCC1 degron (no auxin, prometaphase)"
)) %>%
dplyr::mutate(ssim = factor(ssim, levels = c("CTCF degron (no auxin)",
"CTCF degron (auxin 2h)",
"SCC1 degron (no auxin)",
"SCC1 degron (auxin 3h)",
"SCC1 degron (no auxin, prometaphase)"),
ordered = TRUE))
Fig1_Wutz_coverage_correlations <- ggplot(wutz_correlations_subset, aes(x = ssim, y = r)) +
geom_col() +
facet_grid(~cov) +
coord_flip() +
theme_classic(base_size = 10) +
labs(x = "", y = "Pearson's R") +
scale_x_discrete(limits = rev)
Fig1_Wutz_coverage_correlations
ggsave(file.path(basedir, "figures/final/Fig1_Wutz_coverage_correlations.pdf"))
Fig1_Wutz_SSIM_violin +
Fig1_Wutz_SN_violin + scale_x_discrete(labels = rep("", 5)) +
Fig1_Wutz_coverage_correlations + scale_x_discrete(labels = rep("", 5), limits = rev) +
plot_layout(widths = c(1, 1, 2))
ggsave(file.path(basedir, "figures/final/Fig1_Wutz_SSIM_SN_violin_correlations_assembled.pdf"))
stats_files <- list.files(file.path(basedir, "data/hic/"), pattern = "*hic_stats.txt", recursive = TRUE)
rao_stats_files <- stats_files[startsWith(stats_files, "GM12878") | startsWith(stats_files, "K562")]
rao_stats <- lapply(rao_stats_files, function(f){
read.csv(file.path(basedir, "data", "hic", f), header = FALSE,
col.names = c("file", "class", "n"))
}) %>% bind_rows() %>%
dplyr::filter(class == "valid")
rao_stats <- rao_stats %>%
mutate(file = gsub(".pairs", "", file))
ggplot(rao_stats, aes(x = file, y = n)) +
geom_col() +
coord_flip() +
scale_y_continuous("Number of valid pairs", labels = scales::unit_format(unit = "million", scale = 1e-6)) +
theme_classic(base_size = 10)
sample_names <- c(paste("GM12878", 1:9, sep = "_"),
paste("K562", 1:2, sep = "_"))
rao_marginals_files <- file.path(basedir, "data/hic/", sample_names, paste0(sample_names, "_25kb_marginals.bed"))
names(rao_marginals_files) <- gsub("_marginals.bed", "", basename(rao_marginals_files))
rao_marginals_list <- lapply(rao_marginals_files, function(f){
readr::read_delim(f, delim = "\t", col_names = c("chr", "start", "end", "name", "score", "strand")) %>%
makeGRangesFromDataFrame(keep.extra.columns = TRUE)
})
rao_marginals_df <- lapply(rao_marginals_list, as.data.frame) %>%
bind_rows(.id = "file")
rao_percent_df <- rao_marginals_df %>%
group_by(file) %>%
summarise(percent = 100 * sum(score > 1000) / n())
# percent_df %>%
# knitr::kable(digits = 1)
ggplot(rao_percent_df, aes(x = file, y = percent, label = signif(percent, 3))) +
geom_col() +
geom_text(colour = "black", hjust = "outward", nudge_y = 1) +
geom_hline(yintercept = 80, linetype = 2) +
scale_y_continuous("% bins with more than 1000 valid reads", limits = c(0, 100)) +
coord_flip() +
theme_classic(base_size = 10) +
labs(x = "")
combinations <- expand.grid(sample_names, sample_names) %>%
filter(Var1 != Var2)
comparisons <- c(paste0(combinations$Var1, "_vs_", combinations$Var2))
chess_files <- list.files(file.path(basedir, "data", "chess", comparisons), full.names = TRUE, pattern = "chr.*txt")
names(chess_files) <- sapply(chess_files, function(path){
parts <- tail(strsplit(path, "/")[[1]], 2)
gsub(".txt", "", paste(parts, collapse = "_"))
})
chess_comparisons_rao <- lapply(chess_files, function(f){
read.table(f, sep = "\t", header = TRUE)
}) %>% bind_rows(.id = "comparison") %>%
extract(comparison, regex = "(.*)_vs_(.*)_(chr[[:alnum:]]+)_window([[:alnum:]]+Mb)_step([[:alnum:]]+[k|M]b)_([[:alnum:]]+kb)",
into = c("query", "ref", "chr", "window_size", "step_size", "resolution"),
remove = FALSE) %>%
dplyr::filter(!is.na(ssim))
chess_comparisons_rao <- left_join(chess_comparisons_rao, dplyr::select(comparison_regions, -regions))
chess_comparisons_rao_plotting_subset <- chess_comparisons_rao %>%
dplyr::filter(ref == "GM12878_1") %>%
left_join(rao_stats, by = c("query" = "file")) %>%
rename("n" = "valid pairs")
ggplot(chess_comparisons_rao_plotting_subset, aes(x = query, y = ssim, fill = `valid pairs`)) +
geom_violin() +
stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1),
geom = "pointrange", color = "black") +
theme_classic(base_size = 10) +
coord_flip() +
scale_x_discrete("", limits = rev) +
scale_fill_fermenter("Valid pairs\n(millions)",
labels = scales::unit_format(unit = "", scale = 1e-6),
direction = 1)
ggsave(file.path(basedir, "figures/final/Fig2_Rao_SSIM_violin.pdf"))
ggplot(chess_comparisons_rao_plotting_subset, aes(x = query, y = SN, fill = `valid pairs`)) +
geom_violin() +
stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1),
geom = "pointrange", color = "black") +
theme_classic(base_size = 10) +
coord_flip() +
scale_x_discrete("", limits = rev) +
scale_fill_fermenter("Valid pairs\n(millions)",
labels = scales::unit_format(unit = "", scale = 1e-6),
direction = 1)
ggsave(file.path(basedir, "figures/final/Fig2_Rao_SN_violin.pdf"))
sample_names_short <- c("K562_1", "K562_2", "GM12878_3", "GM12878_5")
combinations <- expand.grid(sample_names_short, sample_names_short) %>%
filter(Var1 != Var2)
comparisons <- c(paste0(combinations$Var1, "_vs_", combinations$Var2))
chess_files <- list.files(file.path(basedir, "data", "chess", comparisons, "downsampled"), full.names = TRUE, pattern = "chr.*txt")
names(chess_files) <- sapply(chess_files, function(path){
parts <- tail(strsplit(path, "/")[[1]], 3)
gsub("downsampled_|.txt", "", paste(parts, collapse = "_"))
})
chess_comparisons_rao_downsampled <- lapply(chess_files, function(f){
read.table(f, sep = "\t", header = TRUE)
}) %>% bind_rows(.id = "comparison")
chess_comparisons_rao_downsampled <- chess_comparisons_rao_downsampled %>%
extract(comparison, regex = "(.*)_vs_(.*)_(chr[[:alnum:]]+)_window([[:alnum:]]+Mb)_step([[:alnum:]]+[k|M]b)_([[:alnum:]]+kb)",
into = c("query", "ref", "chr", "window_size", "step_size", "resolution"),
remove = FALSE) %>%
dplyr::filter(!is.na(ssim))
chess_comparisons_rao_downsampled <- left_join(chess_comparisons_rao_downsampled,
dplyr::select(comparison_regions, -regions))
The idea is to take the top 10% of SN and bottom 10% of SSIM from comparisons of biological replicates and use these to set thresholds for comparisons of different biological conditions.
# The SSIM Z-score was calculated per chromosome, but I want it across chromosomes, so I'll recalculate this and also calculate Z-scores for SN.
chess_comparisons_rao_downsampled <- chess_comparisons_rao_downsampled %>%
group_by(query, ref, window_size, step_size, resolution) %>%
mutate(z_SN = (SN - mean(SN)) / sd(SN),
z_ssim = (ssim - mean(ssim)) / sd(ssim))
chess_comparisons_rao_downsampled_dedup <- chess_comparisons_rao_downsampled %>%
mutate(query = factor(query, levels = sample_names_short),
ref = factor(ref, levels = sample_names_short)) %>%
dplyr::filter(as.numeric(query) < as.numeric(ref)) %>%
mutate(query = as.character(query),
ref = as.character(ref))
chess_comparisons_rao_downsampled_dedup <- chess_comparisons_rao_downsampled_dedup %>%
tidyr::unite("comparison", query, ref, sep = "_vs_", remove = FALSE)
GM12878_thresholds <- chess_comparisons_rao_downsampled_dedup %>%
dplyr::filter(startsWith(query, "GM12878") & startsWith(ref, "GM12878")) %>%
group_by(query, ref, window_size, step_size, resolution) %>%
summarise(SN_q90 = quantile(SN, 0.9),
ssim_q10 = quantile(ssim, 0.1),
SN_q80 = quantile(SN, 0.8),
ssim_q20 = quantile(ssim, 0.2))
K562_thresholds <- chess_comparisons_rao_downsampled_dedup %>%
dplyr::filter(startsWith(query, "K562") & startsWith(ref, "K562")) %>%
group_by(query, ref, window_size, step_size, resolution) %>%
summarise(SN_q90 = quantile(SN, 0.9),
ssim_q10 = quantile(ssim, 0.1),
SN_q80 = quantile(SN, 0.8),
ssim_q20 = quantile(ssim, 0.2))
all_thresholds <- bind_rows(GM12878_thresholds, K562_thresholds) %>%
tidyr::unite("comparison", query, ref, sep = "_vs_", remove = FALSE)
all_thresholds
## # A tibble: 2 x 10
## # Groups: query, ref, window_size, step_size [2]
## comparison query ref window_size step_size resolution SN_q90 ssim_q10 SN_q80
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 GM12878_3… GM12… GM12… 2Mb 500kb 25kb 0.577 0.274 0.521
## 2 K562_1_vs… K562… K562… 2Mb 500kb 25kb 0.813 0.282 0.695
## # … with 1 more variable: ssim_q20 <dbl>
Since the thresholds are very similar, I’ll use the slightly less stringent values for this analysis.
SN_threshold <- all_thresholds %>% pull(SN_q90) %>% min()
ssim_threshold <- all_thresholds %>% pull(ssim_q10) %>% max()
chess_comparisons_rao_downsampled_dedup %>%
# dplyr::filter(startsWith(query, "K562") & startsWith(ref, "GM12878")) %>%
ggplot(aes(x = ssim, y = SN)) +
geom_rect(aes(xmin=-Inf, xmax=ssim_threshold, ymin=SN_threshold, ymax=Inf), fill = "lightsteelblue") +
geom_point(size = 0.5) +
facet_grid(query~ref) +
theme_classic(base_size = 10) +
geom_hline(yintercept = SN_threshold, linetype = 2) +
geom_vline(xintercept = ssim_threshold, linetype = 2) +
theme(panel.border = element_rect(linetype="solid", colour = "black",
fill = NA, size = rel(2)),
axis.line = element_blank()) +
labs(x = "SSIM", y = "SN")
Fig2_Rao_SSIM_SN_thresholds_scatterplot <- chess_comparisons_rao_downsampled_dedup %>%
dplyr::filter(startsWith(query, "K562") & startsWith(ref, "GM12878")) %>%
ggplot(aes(x = ssim, y = SN)) +
geom_rect(aes(xmin=-Inf, xmax=ssim_threshold, ymin=SN_threshold, ymax=Inf), fill = "lightsteelblue") +
geom_point(size = 0.5) +
facet_grid(query~ref) +
theme_classic(base_size = 10) +
geom_hline(yintercept = SN_threshold, linetype = 2) +
geom_vline(xintercept = ssim_threshold, linetype = 2) +
theme(panel.border = element_rect(linetype="solid", colour = "black",
fill = NA, size = rel(2)),
axis.line = element_blank()) +
labs(x = "SSIM", y = "SN")
Fig2_Rao_SSIM_SN_thresholds_scatterplot
Fig2_Rao_SSIM_SN_thresholds_scatterplot
ggsave(file.path(basedir, "figures/final/Fig2_Rao_SSIM_SN_thresholds_scatterplot.pdf"))
regions_passing_thresholds <- chess_comparisons_rao_downsampled_dedup %>%
dplyr::filter(SN > SN_threshold & ssim < ssim_threshold)
regions_passing_thresholds
## # A tibble: 349 x 14
## # Groups: query, ref, window_size, step_size, resolution [4]
## query comparison ref chr window_size step_size resolution ID SN
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <int> <dbl>
## 1 K562_1 K562_1_vs_G… GM128… chr16 2Mb 500kb 25kb 5186 0.716
## 2 K562_1 K562_1_vs_G… GM128… chr2 2Mb 500kb 25kb 496 0.674
## 3 K562_1 K562_1_vs_G… GM128… chr2 2Mb 500kb 25kb 506 1.42
## 4 K562_1 K562_1_vs_G… GM128… chr2 2Mb 500kb 25kb 521 0.807
## 5 K562_1 K562_1_vs_G… GM128… chr2 2Mb 500kb 25kb 522 1.01
## 6 K562_1 K562_1_vs_G… GM128… chr2 2Mb 500kb 25kb 526 0.957
## 7 K562_1 K562_1_vs_G… GM128… chr2 2Mb 500kb 25kb 527 0.898
## 8 K562_1 K562_1_vs_G… GM128… chr2 2Mb 500kb 25kb 528 0.845
## 9 K562_1 K562_1_vs_G… GM128… chr2 2Mb 500kb 25kb 537 0.671
## 10 K562_1 K562_1_vs_G… GM128… chr2 2Mb 500kb 25kb 538 0.931
## # … with 339 more rows, and 5 more variables: ssim <dbl>, z_ssim <dbl>,
## # start <int>, end <int>, z_SN <dbl>
regions_passing_thresholds_n <- regions_passing_thresholds %>%
group_by(query, ref) %>%
summarise(n = n())
regions_passing_thresholds %>%
group_by(ID) %>%
summarise(n = n()) %>%
pull(n) %>% table()
## .
## 1 2 3 4
## 20 20 27 52
for (x in unique(regions_passing_thresholds$comparison)){
message(x)
regions_passing_thresholds %>%
dplyr::filter(comparison == x) %>%
arrange(-ssim) %>%
ungroup() %>%
select(chr, start, end) %>%
unique() %>%
write.table(file = file.path(basedir, "data/chess/", paste0(x, "_window2Mb_step500kb_25kb_ssim10_SN90.bed")),
col.names = FALSE, row.names = FALSE, quote = FALSE, sep = "\t")
}
regions_passing_thresholds %>%
group_by(ID) %>%
mutate(n_passing = n()) %>%
dplyr::filter(n_passing == 4) %>%
arrange(-ssim) %>%
ungroup() %>%
select(chr, start, end) %>%
unique() %>%
write.table(file = file.path(basedir, "data/chess/", "GM12878_vs_K562_window2Mb_step500kb_25kb_ssim10_SN90.bed"),
col.names = FALSE, row.names = FALSE, quote = FALSE, sep = "\t")
set.seed(42)
chess_comparisons_rao_downsampled_dedup %>%
dplyr::filter(!(ID %in% regions_passing_thresholds$ID)) %>%
ungroup() %>%
dplyr::slice_sample(n = 100) %>%
arrange(-ssim) %>%
select(chr, start, end) %>%
unique() %>%
write.table(file = file.path(basedir, "data/chess/", "GM12878_vs_K562_window2Mb_step500kb_25kb_negative_set.bed"),
col.names = FALSE, row.names = FALSE, quote = FALSE, sep = "\t")
negative_set <- read.table(file.path(basedir, "data/chess/", "GM12878_vs_K562_window2Mb_step500kb_25kb_negative_set.bed"),
col.names = c("chr", "start", "end"))
negative_set <- left_join(negative_set, comparison_regions)
chess_comparisons_rao_downsampled_dedup %>%
dplyr::filter(startsWith(query, "K562") & startsWith(ref, "GM12878")) %>%
mutate(is_negative = ifelse(ID %in% negative_set$ID, "yes", "no")) %>%
ggplot(aes(x = ssim, y = SN)) +
geom_rect(aes(xmin=-Inf, xmax=ssim_threshold, ymin=SN_threshold, ymax=Inf), fill = "lightsteelblue") +
geom_point(size = 1, aes(colour = is_negative)) +
facet_grid(query~ref) +
theme_classic(base_size = 10) +
geom_hline(yintercept = SN_threshold, linetype = 2) +
geom_vline(xintercept = ssim_threshold, linetype = 2) +
theme(panel.border = element_rect(linetype="solid", colour = "black",
fill = NA, size = rel(2)),
axis.line = element_blank()) +
labs(x = "SSIM", y = "SN") +
scale_colour_manual(values = c("yes" = "red", "no" = "black"))
comparisons <- "GM12878_all_vs_K562_all"
chess_files <- list.files(file.path(basedir, "data", "chess", comparisons), full.names = TRUE, pattern = "chr.*txt")
names(chess_files) <- sapply(chess_files, function(path){
parts <- tail(strsplit(path, "/")[[1]], 2)
gsub(".txt", "", paste(parts, collapse = "_"))
})
chess_comparisons_rao_merged <- lapply(chess_files, function(f){
read.table(f, sep = "\t", header = TRUE)
}) %>% bind_rows(.id = "comparison") %>%
extract(comparison, regex = "(.*)_vs_(.*)_(chr[[:alnum:]]+)_window([[:alnum:]]+Mb)_step([[:alnum:]]+[k|M]b)_([[:alnum:]]+kb)",
into = c("query", "ref", "chr", "window_size", "step_size", "resolution"),
remove = FALSE) %>%
dplyr::filter(!is.na(ssim))
chess_comparisons_rao_merged <- left_join(chess_comparisons_rao_merged, comparison_regions)
chess_comparisons_rao_merged<- chess_comparisons_rao_merged %>%
group_by(query, ref, window_size, step_size, resolution) %>%
mutate(z_SN = (SN - mean(SN)) / sd(SN),
z_ssim = (ssim - mean(ssim)) / sd(ssim))
When comparing merged replicates of GM12878 and K562, 580 regions on chromosomes 2 and 16 have an SN value of more than 0.6, out of 580 (100 %).
In comparison, for the DLBCL-control comparison, 1906 regions out of 5432 or (35.1 %) of regions genome wide have an SN value of more than 0.6.
bind_rows(chess_comparisons_DLBCL,
chess_comparisons_rao_merged,
chess_comparisons_rao_downsampled_dedup) %>%
dplyr::filter(comparison != "mixA_vs_mixB_window2Mb_step500kb_25kb") %>%
dplyr::mutate(comparison = case_when(
comparison == "DLBCL_vs_control_window2Mb_step500kb_25kb" ~ "DLBCL_vs_control",
comparison == "GM12878_all_vs_K562_all_chr16_window2Mb_step500kb_25kb" ~ "GM12878_all_vs_K562_all",
comparison == "GM12878_all_vs_K562_all_chr2_window2Mb_step500kb_25kb" ~ "GM12878_all_vs_K562_all",
TRUE ~ comparison
)) %>%
dplyr::mutate(comparison = factor(comparison,
levels = c("K562_1_vs_GM12878_3",
"K562_1_vs_GM12878_5",
"K562_2_vs_GM12878_3",
"K562_2_vs_GM12878_5",
"GM12878_3_vs_GM12878_5",
"K562_1_vs_K562_2",
"GM12878_all_vs_K562_all",
"DLBCL_vs_control"), ordered = TRUE)) %>%
ggplot(aes(x = comparison, y = SN)) +
geom_violin() +
stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1),
geom = "pointrange", color = "black") +
geom_hline(yintercept = 0.6, linetype = 2, colour = "blue") +
theme_classic(base_size = 10) +
coord_flip() +
labs(x = "")
ggsave(file.path(basedir, "figures/final/Fig2_all_SN_distributions.pdf"))
This report was generated at 14:52:03, Tue Oct 12 2021.
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] patchwork_1.1.1 GenomicRanges_1.40.0 GenomeInfoDb_1.24.2
## [4] IRanges_2.22.2 S4Vectors_0.26.1 BiocGenerics_0.34.0
## [7] ggplot2_3.3.3 tidyr_1.1.3 dplyr_1.0.5
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 matrixStats_0.59.0
## [3] RColorBrewer_1.1-2 rprojroot_2.0.2
## [5] tools_4.0.3 backports_1.2.1
## [7] bslib_0.2.4 utf8_1.2.1
## [9] R6_2.5.0 rpart_4.1-15
## [11] Hmisc_4.5-0 DBI_1.1.1
## [13] colorspace_2.0-1 nnet_7.3-15
## [15] withr_2.4.2 gridExtra_2.3
## [17] tidyselect_1.1.0 compiler_4.0.3
## [19] cli_2.5.0 Biobase_2.48.0
## [21] htmlTable_2.1.0 DelayedArray_0.14.1
## [23] rtracklayer_1.48.0 labeling_0.4.2
## [25] sass_0.3.1 checkmate_2.0.0
## [27] scales_1.1.1 readr_1.4.0
## [29] corrr_0.4.3 stringr_1.4.0
## [31] digest_0.6.27 Rsamtools_2.4.0
## [33] foreign_0.8-81 rmarkdown_2.7
## [35] XVector_0.28.0 base64enc_0.1-3
## [37] jpeg_0.1-8.1 pkgconfig_2.0.3
## [39] htmltools_0.5.1.1 highr_0.8
## [41] htmlwidgets_1.5.3 rlang_0.4.11
## [43] rstudioapi_0.13 jquerylib_0.1.3
## [45] farver_2.1.0 generics_0.1.0
## [47] jsonlite_1.7.2 BiocParallel_1.22.0
## [49] RCurl_1.98-1.2 magrittr_2.0.1
## [51] GenomeInfoDbData_1.2.3 Formula_1.2-4
## [53] Matrix_1.3-4 munsell_0.5.0
## [55] fansi_0.5.0 lifecycle_1.0.0
## [57] stringi_1.5.3 yaml_2.2.1
## [59] SummarizedExperiment_1.18.2 zlibbioc_1.34.0
## [61] grid_4.0.3 crayon_1.4.1
## [63] lattice_0.20-41 Biostrings_2.56.0
## [65] splines_4.0.3 hms_1.0.0
## [67] knitr_1.31 pillar_1.6.1
## [69] codetools_0.2-18 XML_3.99-0.5
## [71] glue_1.4.2 drat_0.1.8
## [73] evaluate_0.14 latticeExtra_0.6-29
## [75] data.table_1.14.0 vctrs_0.3.8
## [77] png_0.1-7 gtable_0.3.0
## [79] purrr_0.3.4 assertthat_0.2.1
## [81] xfun_0.22 broom_0.7.5
## [83] survival_3.2-9 tibble_3.1.2
## [85] GenomicAlignments_1.24.0 cluster_2.1.1
## [87] ellipsis_0.3.2 here_1.0.1